subway_ridership = lm(subway_2020 ~ month, data = mta_data)
subway_ridership %>% 
  broom::tidy() %>% 
  select(term, estimate, p.value) %>% 
  knitr::kable(digits = 3)
term estimate p.value
(Intercept) 965079.87 0.000
month 32472.74 0.139
#to plot for regression line 
#  ggplot(mta_data, aes(month, subway_2020)) +
#  geom_point() +
#  stat_smooth(method = lm)


mta_data %>% 
  modelr::add_predictions(subway_ridership) %>% 
  modelr::add_residuals(subway_ridership) %>% 
  ggplot(aes(x = pred, y = resid)) + geom_point() + 
  labs(x = "Predicted value", 
       y = "Residual")

bus_ridership = lm(bus_2020 ~ month, data = mta_data)
bus_ridership %>% 
  broom::tidy() 
# A tibble: 2 x 5
  term        estimate std.error statistic  p.value
  <chr>          <dbl>     <dbl>     <dbl>    <dbl>
1 (Intercept)  717060.    64077.     11.2  4.46e-24
2 month         26325.     8733.      3.01 2.82e- 3
#Now we need to tidy the output and get only the intercept, slope and p-values 
bus_ridership %>% 
  broom::tidy() %>% 
  select(term, estimate, p.value) %>% 
  knitr::kable(digits = 3)
term estimate p.value
(Intercept) 717060.27 0.000
month 26324.69 0.003
#to plot for regression line
#ggplot(mta_data, aes(month, bus_2020)) +
#  geom_point() +
#  stat_smooth(method = lm)

Row

Chart 1

plot_ttest %>%
plot_ly(
    x = ~month, y = ~value, type = "scatter", mode = "lines+markers",
    color = ~variable, text = ~text_label) %>%
  layout (
    title = "Monthly Average Ridership of Subway 2019 vs 2020",
    xaxis = list(title ="Months",range=c(3,11)),
    yaxis = list(title="Average Ridership"),
    legend = list(font = list(size = 10))
    )

Chart 2

mta_data %>% 
  modelr::add_predictions(subway_ridership) %>% 
  modelr::add_residuals(subway_ridership) %>% 
  ggplot(aes(x = pred, y = resid)) + geom_point() + 
  labs(x = "Predicted value", 
       y = "Residual")

Row

Chart 3

plot_subway %>%
  plot_ly(
    x = ~date, y = ~value, type = "scatter", mode = "markers",
    color = ~variable, text = ~text_label) %>%
  layout (
    title = "Subway Ridership Trends 2019 - 2020",
    xaxis = list(title ="Month/Day", tickformat = "%m/%d"), #drop year
    yaxis = list(title="Ridership"))  %>%
  add_lines(x =as.Date("2020-03-01"), line = list(dash="dot", color = 'red', width=0.5, opacity = 0.5),name = 'First case on 3/1') %>%
  add_lines(x =as.Date("2020-04-07"), line = list(dash="dot", color = 'red', width=0.5, alpha = 0.5),name = '100K cases in NYC on 04/07') %>%
  add_lines(x =as.Date("2020-05-26"), line = list(dash="dot", color = 'red', width=0.5, alpha = 0.5),name = '200K cases in NYC on 05/26')

Chart 4

#plot_bus
plot_bus  %>%
  plot_ly(
    x = ~date, y = ~value, type = "scatter", mode = "markers",
    color = ~variable, text = ~text_label) %>%
  layout (
    title = "Bus Ridership Trends 2019 - 2020",
    xaxis = list(title ="Month/Day", tickformat = "%m/%d"),
    yaxis = list(title="Ridership")) %>%
  add_lines(x =as.Date("2020-03-01"), line = list(dash="dot", color = 'red', width=0.5, opacity = 0.5),name = 'First case on 3/1') %>%
  add_lines(x =as.Date("2020-04-07"), line = list(dash="dot", color = 'red', width=0.5, alpha = 0.5),name = '100K cases in NYC on 04/07') %>%
  add_lines(x =as.Date("2020-05-26"), line = list(dash="dot", color = 'red', width=0.5, alpha = 0.5),name = '200K cases in NYC on 05/26')